The data is consisting of the prisoners and stuffs were impacted by Covid-19 (deaths, case rates) in all the states in the US.
Loading in datasets
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(ggplot2)
library(here)
## here() starts at /Users/Nesli/Desktop/Github/hw-ncaliskan/final_project
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
if ( !dir.exists( here::here("final_project", "data") ) ) {
dir.create( here::here("final_project", "data", "output", ".R"), recursive = TRUE )
}
Loading in csv files
covid_prison_case <- read_csv(here("data", "covid_prison_cases.csv"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## name = col_character(),
## abbreviation = col_character(),
## staff_tests = col_double(),
## staff_tests_with_multiples = col_double(),
## total_staff_cases = col_double(),
## staff_recovered = col_double(),
## total_staff_deaths = col_double(),
## prisoner_tests = col_double(),
## prisoner_tests_with_multiples = col_double(),
## total_prisoner_cases = col_double(),
## prisoners_recovered = col_double(),
## total_prisoner_deaths = col_double(),
## as_of_date = col_character(),
## notes = col_character()
## )
covid_prison_rate <- read_csv(here("data", "covid_prison_rates.csv"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## name = col_character(),
## latest_week = col_character(),
## cumulative_prisoner_cases = col_double(),
## prisoner_cases_pct = col_double(),
## prisoner_case_rate = col_character(),
## cumulative_prisoner_deaths = col_double(),
## prisoner_deaths_pct = col_double(),
## prisoner_death_rate = col_character(),
## cumulative_staff_cases = col_double(),
## staff_cases_pct = col_double(),
## staff_case_rate = col_character(),
## cumulative_staff_deaths = col_double(),
## staff_deaths_pct = col_double(),
## staff_death_rate = col_character()
## )
covid_population <- read_csv(here("data", "prison_populations.csv"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## name = col_character(),
## abbreviation = col_character(),
## march_pop = col_double(),
## april_pop = col_double(),
## june_pop = col_double(),
## july_pop = col_double(),
## aug_pop = col_double(),
## sept_pop = col_double(),
## oct_pop = col_double(),
## nov_pop = col_double(),
## as_of_date_march = col_character(),
## as_of_date_april = col_character(),
## as_of_date_june = col_character(),
## as_of_date_july = col_character(),
## as_of_date_aug = col_character(),
## as_of_date_sept = col_character(),
## as_of_date_oct = col_character(),
## as_of_date_nov = col_character()
## )
staff_population <- read_csv(here("data", "staff_populations.csv"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## name = col_character(),
## abbreviation = col_character(),
## april_pop = col_double(),
## as_of_date = col_character(),
## notes = col_character()
## )
Renamed the column so the names for it wouldn’t overlap across different datasets when I join them together.
(staff_population %>%
rename(staff_april_pop = april_pop))
## # A tibble: 51 x 5
## name abbreviation staff_april_pop as_of_date notes
## <chr> <chr> <dbl> <chr> <chr>
## 1 Alabama AL 6259 9/30/2019 The staff number is from Sep…
## 2 Alaska AK NA <NA> <NA>
## 3 Arizona AZ 8700 5/11/2020 <NA>
## 4 Arkans… AR 4045 2/29/2020 <NA>
## 5 Califo… CA 46000 4/15/2020 CDCR's pio says that it's ro…
## 6 Colora… CO 6267 12/2/2019 Staff numbers are from a gov…
## 7 Connec… CT 6170 3/1/2020 <NA>
## 8 Delawa… DE 2530 5/8/2020 <NA>
## 9 Florida FL 22218 4/15/2020 <NA>
## 10 Georgia GA 8399 6/30/2019 <NA>
## # … with 41 more rows
Joining all three datasets together
# Merged_Covid_Data <- full_join(staff_population, covid_population, by = c("name", "abbreviation")) %>%
# full_join(covid_prison_case, by = c("name", "abbreviation"))
Filtering to see all the Florida data
Florida_Data <- covid_prison_case %>%
filter(name == "Florida") %>%
select(abbreviation, as_of_date, total_staff_cases, total_staff_deaths, total_prisoner_cases, total_prisoner_deaths)
Staff_Data_FL <- staff_population %>%
filter(name == "Florida") %>%
select(as_of_date, april_pop)
Prisoner_Data_FL <- covid_population %>%
filter(name == "Florida")
ggplot(Florida_Data,
aes(x = as.Date(as_of_date),
y= total_staff_cases),
color = "purple",
fill = "purple") +
geom_point(alpha = 0.1) +
scale_x_date("Date", date_breaks = "12 months", date_labels = "%m-%Y") +
scale_y_discrete(name = "Total Staff Cases") +
geom_smooth(method = "loess") +
geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype = "dashed") +
scale_y_continuous(name = "Total Staff Cases") +
ggtitle("Total Staff Cases Per Year") +
theme_bw()
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (stat_smooth).
## Warning: Removed 31 rows containing missing values (geom_point).
Compare prisoner cases to staff cases in Florida
ggplot(Florida_Data,
aes(x = total_prisoner_cases,
y= total_staff_cases)) +
geom_point(alpha = 0.4,
size = 6,
color = "purple",
fill = "purple") +
scale_x_continuous(name = "Total Prisoner Cases") +
scale_y_continuous(name = "Total Staff Cases") +
ggtitle("Prisoner Cases over Staff Cases in Florida") +
theme_classic()
Compare prisoner deaths to staff deaths in Florida
ggplot(Florida_Data,
aes(x = total_prisoner_deaths,
y= total_staff_deaths)) +
geom_point(alpha = 0.3,
size = 3,
color = "blue",
fill = "blue") +
scale_x_continuous(name = "Total Prisoner Deaths") +
scale_y_continuous(name = "Total Staff Deaths") +
ggtitle("Prisoner Deaths over Staff Deaths in Florida") +
theme_minimal()
## Warning: Removed 12 rows containing missing values (geom_point).
Get the summary table for multiple columns in the dataset
Summarized_Florida_Data <- Florida_Data %>%
summarise(TSC_MN = mean(total_staff_cases, na.rm = TRUE),
TSC_SD = sd(total_staff_cases, na.rm = TRUE),
TSD_MN = mean(total_staff_deaths, na.rm = TRUE),
TSD_SD = sd(total_staff_deaths, na.rm = TRUE),
TPC_MD = mean(total_prisoner_cases, na.rm = TRUE),
TPC_SD = sd(total_prisoner_cases, na.rm = TRUE),
TPD_MN = mean(total_prisoner_deaths, na.rm = TRUE),
TPD_SD = sd(total_prisoner_deaths, na.rm = TRUE),
TSC_min = min(total_staff_cases, na.rm = TRUE),
TSD_min = min(total_staff_deaths, na.rm = TRUE),
TPC_min = min(total_prisoner_cases, na.rm = TRUE),
TPD_min = min(total_prisoner_deaths, na.rm = TRUE),
TSC_max = max(total_staff_cases, na.rm = TRUE),
TSD_max = max(total_staff_deaths, na.rm = TRUE),
TPC_max = max(total_prisoner_cases, na.rm = TRUE),
TPD_max = max(total_prisoner_deaths, na.rm = TRUE))
Create a dataset for Wisconsin
Texas_Data <- covid_prison_case %>%
filter(name == "Texas") %>%
select(abbreviation, as_of_date, total_staff_cases, total_staff_deaths, total_prisoner_cases, total_prisoner_deaths)
The plot comparing prisoner deaths to staff deaths in Texas
ggplot(Texas_Data,
aes(x = total_prisoner_deaths,
y= total_staff_deaths)) +
geom_point(alpha = 0.3,
size = 3,
color = "orange",
fill = "orange") +
scale_x_continuous(name = "Total Prisoner Deaths") +
scale_y_continuous(name = "Total Staff Deaths") +
ggtitle("Prisoner Deaths over Staff Deaths in Florida") +
theme_minimal()
Staff_Stuff <- covid_prison_case %>%
select(abbreviation, total_staff_cases, total_staff_deaths)
Comparing staff dates with staff deaths by state
Staff_Stuff %>%
plotly::plot_ly(x = ~total_staff_cases,
y = ~total_staff_deaths,
color = ~abbreviation,
# mode specifies the geometric object e.g. "markers" for points, "line" for lines
mode = 'markers',
# type controls the "type" of graph e.g. 'bar', 'scatter'
type = 'scatter'
)
## Warning: Ignoring 479 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors